[CUBLAS] Update wrapppers to use the ILP64 API #2845

amontoison · 2025-08-12T20:54:53Z

I checked the symbols with nm -D .../libcusolver.so and it seems that they are in the library.

github-actions · 2025-08-12T21:57:45Z

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.

diff --git a/lib/cublas/libcublas.jl b/lib/cublas/libcublas.jl
index 28d827e4f..2c03203a1 100644
--- a/lib/cublas/libcublas.jl
+++ b/lib/cublas/libcublas.jl
@@ -5914,15 +5914,19 @@ end
                                           incy::Cint, batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHSHgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
-                                          incx, beta, yarray, incy, batchCount)
+@checked function cublasHSHgemvBatched_64(
+        handle, trans, m, n, alpha, Aarray, lda, xarray,
+        incx, beta, yarray, incy, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHSHgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
-                                             m::Int64, n::Int64, alpha::CuRef{Cfloat},
-                                             Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
-                                             xarray::CuPtr{Ptr{Float16}}, incx::Int64,
-                                             beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Float16}},
-                                             incy::Int64, batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHSHgemvBatched_64(
+        handle::cublasHandle_t, trans::cublasOperation_t,
+        m::Int64, n::Int64, alpha::CuRef{Cfloat},
+        Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+        xarray::CuPtr{Ptr{Float16}}, incx::Int64,
+        beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Float16}},
+        incy::Int64, batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHSSgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -5936,15 +5940,19 @@ end
                                           incy::Cint, batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHSSgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
-                                          incx, beta, yarray, incy, batchCount)
+@checked function cublasHSSgemvBatched_64(
+        handle, trans, m, n, alpha, Aarray, lda, xarray,
+        incx, beta, yarray, incy, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHSSgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
-                                             m::Int64, n::Int64, alpha::CuRef{Cfloat},
-                                             Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
-                                             xarray::CuPtr{Ptr{Float16}}, incx::Int64,
-                                             beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}},
-                                             incy::Int64, batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHSSgemvBatched_64(
+        handle::cublasHandle_t, trans::cublasOperation_t,
+        m::Int64, n::Int64, alpha::CuRef{Cfloat},
+        Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+        xarray::CuPtr{Ptr{Float16}}, incx::Int64,
+        beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}},
+        incy::Int64, batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasTSTgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -5958,15 +5966,19 @@ end
                                           incy::Cint, batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasTSTgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
-                                          incx, beta, yarray, incy, batchCount)
+@checked function cublasTSTgemvBatched_64(
+        handle, trans, m, n, alpha, Aarray, lda, xarray,
+        incx, beta, yarray, incy, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasTSTgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
-                                             m::Int64, n::Int64, alpha::Ptr{Cfloat},
-                                             Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
-                                             xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
-                                             beta::Ptr{Cfloat}, yarray::Ptr{Ptr{BFloat16}},
-                                             incy::Int64, batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasTSTgemvBatched_64(
+        handle::cublasHandle_t, trans::cublasOperation_t,
+        m::Int64, n::Int64, alpha::Ptr{Cfloat},
+        Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
+        xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
+        beta::Ptr{Cfloat}, yarray::Ptr{Ptr{BFloat16}},
+        incy::Int64, batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasTSSgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -5980,15 +5992,19 @@ end
                                           incy::Cint, batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasTSSgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
-                                          incx, beta, yarray, incy, batchCount)
+@checked function cublasTSSgemvBatched_64(
+        handle, trans, m, n, alpha, Aarray, lda, xarray,
+        incx, beta, yarray, incy, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasTSSgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
-                                             m::Int64, n::Int64, alpha::Ptr{Cfloat},
-                                             Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
-                                             xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
-                                             beta::Ptr{Cfloat}, yarray::Ptr{Ptr{Cfloat}},
-                                             incy::Int64, batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasTSSgemvBatched_64(
+        handle::cublasHandle_t, trans::cublasOperation_t,
+        m::Int64, n::Int64, alpha::Ptr{Cfloat},
+        Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
+        xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
+        beta::Ptr{Cfloat}, yarray::Ptr{Ptr{Cfloat}},
+        incy::Int64, batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHSHgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -6006,19 +6022,23 @@ end
                                                  batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHSHgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
-                                                 x, incx, stridex, beta, y, incy, stridey,
-                                                 batchCount)
+@checked function cublasHSHgemvStridedBatched_64(
+        handle, trans, m, n, alpha, A, lda, strideA,
+        x, incx, stridex, beta, y, incy, stridey,
+        batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHSHgemvStridedBatched_64(handle::cublasHandle_t,
-                                                    trans::cublasOperation_t, m::Int64, n::Int64,
-                                                    alpha::CuRef{Cfloat}, A::CuPtr{Float16},
-                                                    lda::Int64, strideA::Clonglong,
-                                                    x::CuPtr{Float16}, incx::Int64,
-                                                    stridex::Clonglong, beta::CuRef{Cfloat},
-                                                    y::CuPtr{Float16}, incy::Int64,
-                                                    stridey::Clonglong,
-                                                    batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHSHgemvStridedBatched_64(
+        handle::cublasHandle_t,
+        trans::cublasOperation_t, m::Int64, n::Int64,
+        alpha::CuRef{Cfloat}, A::CuPtr{Float16},
+        lda::Int64, strideA::Clonglong,
+        x::CuPtr{Float16}, incx::Int64,
+        stridex::Clonglong, beta::CuRef{Cfloat},
+        y::CuPtr{Float16}, incy::Int64,
+        stridey::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHSSgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -6036,19 +6056,23 @@ end
                                                  batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHSSgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
-                                                 x, incx, stridex, beta, y, incy, stridey,
-                                                 batchCount)
+@checked function cublasHSSgemvStridedBatched_64(
+        handle, trans, m, n, alpha, A, lda, strideA,
+        x, incx, stridex, beta, y, incy, stridey,
+        batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHSSgemvStridedBatched_64(handle::cublasHandle_t,
-                                                    trans::cublasOperation_t, m::Int64, n::Int64,
-                                                    alpha::CuRef{Cfloat}, A::CuPtr{Float16},
-                                                    lda::Int64, strideA::Clonglong,
-                                                    x::CuPtr{Float16}, incx::Int64,
-                                                    stridex::Clonglong, beta::CuRef{Cfloat},
-                                                    y::CuPtr{Cfloat}, incy::Int64,
-                                                    stridey::Clonglong,
-                                                    batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHSSgemvStridedBatched_64(
+        handle::cublasHandle_t,
+        trans::cublasOperation_t, m::Int64, n::Int64,
+        alpha::CuRef{Cfloat}, A::CuPtr{Float16},
+        lda::Int64, strideA::Clonglong,
+        x::CuPtr{Float16}, incx::Int64,
+        stridex::Clonglong, beta::CuRef{Cfloat},
+        y::CuPtr{Cfloat}, incy::Int64,
+        stridey::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasTSTgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -6066,19 +6090,23 @@ end
                                                  batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasTSTgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
-                                                 x, incx, stridex, beta, y, incy, stridey,
-                                                 batchCount)
+@checked function cublasTSTgemvStridedBatched_64(
+        handle, trans, m, n, alpha, A, lda, strideA,
+        x, incx, stridex, beta, y, incy, stridey,
+        batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasTSTgemvStridedBatched_64(handle::cublasHandle_t,
-                                                    trans::cublasOperation_t, m::Int64, n::Int64,
-                                                    alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
-                                                    lda::Int64, strideA::Clonglong,
-                                                    x::CuPtr{BFloat16}, incx::Int64,
-                                                    stridex::Clonglong, beta::CuRef{Cfloat},
-                                                    y::CuPtr{BFloat16}, incy::Int64,
-                                                    stridey::Clonglong,
-                                                    batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasTSTgemvStridedBatched_64(
+        handle::cublasHandle_t,
+        trans::cublasOperation_t, m::Int64, n::Int64,
+        alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
+        lda::Int64, strideA::Clonglong,
+        x::CuPtr{BFloat16}, incx::Int64,
+        stridex::Clonglong, beta::CuRef{Cfloat},
+        y::CuPtr{BFloat16}, incy::Int64,
+        stridey::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasTSSgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -6096,19 +6124,23 @@ end
                                                  batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasTSSgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
-                                                 x, incx, stridex, beta, y, incy, stridey,
-                                                 batchCount)
+@checked function cublasTSSgemvStridedBatched_64(
+        handle, trans, m, n, alpha, A, lda, strideA,
+        x, incx, stridex, beta, y, incy, stridey,
+        batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasTSSgemvStridedBatched_64(handle::cublasHandle_t,
-                                                    trans::cublasOperation_t, m::Int64, n::Int64,
-                                                    alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
-                                                    lda::Int64, strideA::Clonglong,
-                                                    x::CuPtr{BFloat16}, incx::Int64,
-                                                    stridex::Clonglong, beta::CuRef{Cfloat},
-                                                    y::CuPtr{Cfloat}, incy::Int64,
-                                                    stridey::Clonglong,
-                                                    batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasTSSgemvStridedBatched_64(
+        handle::cublasHandle_t,
+        trans::cublasOperation_t, m::Int64, n::Int64,
+        alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
+        lda::Int64, strideA::Clonglong,
+        x::CuPtr{BFloat16}, incx::Int64,
+        stridex::Clonglong, beta::CuRef{Cfloat},
+        y::CuPtr{Cfloat}, incy::Int64,
+        stridey::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
@@ -6121,14 +6153,18 @@ end
                                  C::Ptr{Float16}, ldc::Cint)::cublasStatus_t
 end
 
-@checked function cublasHgemm_64(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
-                                 C, ldc)
+@checked function cublasHgemm_64(
+        handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+        C, ldc
+    )
     initialize_context()
-    @ccall libcublas.cublasHgemm_64(handle::cublasHandle_t, transa::cublasOperation_t,
-                                    transb::cublasOperation_t, m::Int64, n::Int64, k::Int64,
-                                    alpha::Ptr{Float16}, A::Ptr{Float16}, lda::Int64,
-                                    B::Ptr{Float16}, ldb::Int64, beta::Ptr{Float16},
-                                    C::Ptr{Float16}, ldc::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHgemm_64(
+        handle::cublasHandle_t, transa::cublasOperation_t,
+        transb::cublasOperation_t, m::Int64, n::Int64, k::Int64,
+        alpha::Ptr{Float16}, A::Ptr{Float16}, lda::Int64,
+        B::Ptr{Float16}, ldb::Int64, beta::Ptr{Float16},
+        C::Ptr{Float16}, ldc::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHgemmBatched(handle, transa, transb, m, n, k, alpha, Aarray, lda,
@@ -6144,17 +6180,21 @@ end
                                         batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHgemmBatched_64(handle, transa, transb, m, n, k, alpha, Aarray, lda,
-                                        Barray, ldb, beta, Carray, ldc, batchCount)
+@checked function cublasHgemmBatched_64(
+        handle, transa, transb, m, n, k, alpha, Aarray, lda,
+        Barray, ldb, beta, Carray, ldc, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHgemmBatched_64(handle::cublasHandle_t, transa::cublasOperation_t,
-                                           transb::cublasOperation_t, m::Int64, n::Int64,
-                                           k::Int64, alpha::CuRef{Float16},
-                                           Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
-                                           Barray::CuPtr{Ptr{Float16}}, ldb::Int64,
-                                           beta::CuRef{Float16},
-                                           Carray::CuPtr{Ptr{Float16}}, ldc::Int64,
-                                           batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHgemmBatched_64(
+        handle::cublasHandle_t, transa::cublasOperation_t,
+        transb::cublasOperation_t, m::Int64, n::Int64,
+        k::Int64, alpha::CuRef{Float16},
+        Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+        Barray::CuPtr{Ptr{Float16}}, ldb::Int64,
+        beta::CuRef{Float16},
+        Carray::CuPtr{Ptr{Float16}}, ldc::Int64,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHgemmStridedBatched(handle, transa, transb, m, n, k, alpha, A, lda,
@@ -6173,18 +6213,22 @@ end
                                                batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHgemmStridedBatched_64(handle, transa, transb, m, n, k, alpha, A, lda,
-                                               strideA, B, ldb, strideB, beta, C, ldc, strideC,
-                                               batchCount)
-    initialize_context()
-    @ccall libcublas.cublasHgemmStridedBatched_64(handle::cublasHandle_t,
-                                                  transa::cublasOperation_t,
-                                                  transb::cublasOperation_t, m::Int64, n::Int64,
-                                                  k::Int64, alpha::CuRef{Float16},
-                                                  A::CuPtr{Float16}, lda::Int64,
-                                                  strideA::Clonglong, B::CuPtr{Float16},
-                                                  ldb::Int64, strideB::Clonglong,
-                                                  beta::CuRef{Float16}, C::CuPtr{Float16},
-                                                  ldc::Int64, strideC::Clonglong,
-                                                  batchCount::Int64)::cublasStatus_t
+@checked function cublasHgemmStridedBatched_64(
+        handle, transa, transb, m, n, k, alpha, A, lda,
+        strideA, B, ldb, strideB, beta, C, ldc, strideC,
+        batchCount
+    )
+    initialize_context()
+    @ccall libcublas.cublasHgemmStridedBatched_64(
+        handle::cublasHandle_t,
+        transa::cublasOperation_t,
+        transb::cublasOperation_t, m::Int64, n::Int64,
+        k::Int64, alpha::CuRef{Float16},
+        A::CuPtr{Float16}, lda::Int64,
+        strideA::Clonglong, B::CuPtr{Float16},
+        ldb::Int64, strideB::Clonglong,
+        beta::CuRef{Float16}, C::CuPtr{Float16},
+        ldc::Int64, strideC::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end
diff --git a/lib/cublas/wrappers.jl b/lib/cublas/wrappers.jl
index 241e66a82..4a61e2b29 100644
--- a/lib/cublas/wrappers.jl
+++ b/lib/cublas/wrappers.jl
@@ -1116,7 +1116,7 @@ end
 ## (GE) general matrix-matrix multiplication
 for (fname, fname_64, elty) in ((:cublasDgemm_v2, :cublasDgemm_v2_64, :Float64),
                                 (:cublasSgemm_v2, :cublasSgemm_v2_64, :Float32),
-                                (:cublasHgemm, :cublasHgemm_64, :Float16),
+        (:cublasHgemm, :cublasHgemm_64, :Float16),
                                 (:cublasZgemm_v2, :cublasZgemm_v2_64, :ComplexF64),
                                 (:cublasCgemm_v2, :cublasCgemm_v2_64, :ComplexF32))
     @eval begin
@@ -1527,7 +1527,7 @@ end
 ## (GE) general matrix-matrix multiplication batched
 for (fname, fname_64, elty) in ((:cublasDgemmBatched, :cublasDgemmBatched_64, :Float64),
                                 (:cublasSgemmBatched, :cublasSgemmBatched_64, :Float32),
-                                (:cublasHgemmBatched, :cublasHgemmBatched_64, :Float16),
+        (:cublasHgemmBatched, :cublasHgemmBatched_64, :Float16),
                                 (:cublasZgemmBatched, :cublasZgemmBatched_64, :ComplexF64),
                                 (:cublasCgemmBatched, :cublasCgemmBatched_64, :ComplexF32))
     @eval begin
@@ -1594,7 +1594,7 @@ end
 ## (GE) general matrix-matrix multiplication strided batched
 for (fname, fname_64, elty) in ((:cublasDgemmStridedBatched, :cublasDgemmStridedBatched_64, :Float64),
                                 (:cublasSgemmStridedBatched, :cublasSgemmStridedBatched_64, :Float32),
-                                (:cublasHgemmStridedBatched, :cublasHgemmStridedBatched_64, :Float16),
+        (:cublasHgemmStridedBatched, :cublasHgemmStridedBatched_64, :Float16),
                                 (:cublasZgemmStridedBatched, :cublasZgemmStridedBatched_64, :ComplexF64),
                                 (:cublasCgemmStridedBatched, :cublasCgemmStridedBatched_64, :ComplexF32))
     @eval begin
@@ -1946,10 +1946,12 @@ end
 
 ## (TR) Triangular matrix and vector multiplication and solution
 for (mmname, mmname_64, elty) in
-        ((:cublasDtrmm_v2, :cublasDtrmm_v2_64, :Float64),
-         (:cublasStrmm_v2, :cublasStrmm_v2_64, :Float32),
-         (:cublasZtrmm_v2, :cublasZtrmm_v2_64, :ComplexF64),
-         (:cublasCtrmm_v2, :cublasCtrmm_v2_64, :ComplexF32))
+    (
+        (:cublasDtrmm_v2, :cublasDtrmm_v2_64, :Float64),
+        (:cublasStrmm_v2, :cublasStrmm_v2_64, :Float32),
+        (:cublasZtrmm_v2, :cublasZtrmm_v2_64, :ComplexF64),
+        (:cublasCtrmm_v2, :cublasCtrmm_v2_64, :ComplexF32),
+    )
     @eval begin
         # Note: CUBLAS differs from BLAS API for trmm
         #   BLAS: inplace modification of B
@@ -1983,10 +1985,12 @@ for (mmname, mmname_64, elty) in
 end
 
 for (smname, smname_64, elty) in
-        ((:cublasDtrsm_v2, :cublasDtrsm_v2_64, :Float64),
-         (:cublasStrsm_v2, :cublasStrsm_v2_64, :Float32),
-         (:cublasZtrsm_v2, :cublasZtrsm_v2_64, :ComplexF64),
-         (:cublasCtrsm_v2, :cublasCtrsm_v2_64, :ComplexF32))
+    (
+        (:cublasDtrsm_v2, :cublasDtrsm_v2_64, :Float64),
+        (:cublasStrsm_v2, :cublasStrsm_v2_64, :Float32),
+        (:cublasZtrsm_v2, :cublasZtrsm_v2_64, :ComplexF64),
+        (:cublasCtrsm_v2, :cublasCtrsm_v2_64, :ComplexF32),
+    )
     @eval begin
         function trsm!(side::Char,
                        uplo::Char,
diff --git a/res/wrap/libcublas_epilogue.jl b/res/wrap/libcublas_epilogue.jl
index f77958580..e32e20bfe 100644
--- a/res/wrap/libcublas_epilogue.jl
+++ b/res/wrap/libcublas_epilogue.jl
@@ -11,15 +11,19 @@
                                           incy::Cint, batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHSHgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
-                                          incx, beta, yarray, incy, batchCount)
+@checked function cublasHSHgemvBatched_64(
+        handle, trans, m, n, alpha, Aarray, lda, xarray,
+        incx, beta, yarray, incy, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHSHgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
-                                             m::Int64, n::Int64, alpha::CuRef{Cfloat},
-                                             Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
-                                             xarray::CuPtr{Ptr{Float16}}, incx::Int64,
-                                             beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Float16}},
-                                             incy::Int64, batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHSHgemvBatched_64(
+        handle::cublasHandle_t, trans::cublasOperation_t,
+        m::Int64, n::Int64, alpha::CuRef{Cfloat},
+        Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+        xarray::CuPtr{Ptr{Float16}}, incx::Int64,
+        beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Float16}},
+        incy::Int64, batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHSSgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -33,15 +37,19 @@ end
                                           incy::Cint, batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHSSgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
-                                          incx, beta, yarray, incy, batchCount)
+@checked function cublasHSSgemvBatched_64(
+        handle, trans, m, n, alpha, Aarray, lda, xarray,
+        incx, beta, yarray, incy, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHSSgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
-                                             m::Int64, n::Int64, alpha::CuRef{Cfloat},
-                                             Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
-                                             xarray::CuPtr{Ptr{Float16}}, incx::Int64,
-                                             beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}},
-                                             incy::Int64, batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHSSgemvBatched_64(
+        handle::cublasHandle_t, trans::cublasOperation_t,
+        m::Int64, n::Int64, alpha::CuRef{Cfloat},
+        Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+        xarray::CuPtr{Ptr{Float16}}, incx::Int64,
+        beta::CuRef{Cfloat}, yarray::CuPtr{Ptr{Cfloat}},
+        incy::Int64, batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasTSTgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -55,15 +63,19 @@ end
                                           incy::Cint, batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasTSTgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
-                                          incx, beta, yarray, incy, batchCount)
+@checked function cublasTSTgemvBatched_64(
+        handle, trans, m, n, alpha, Aarray, lda, xarray,
+        incx, beta, yarray, incy, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasTSTgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
-                                             m::Int64, n::Int64, alpha::Ptr{Cfloat},
-                                             Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
-                                             xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
-                                             beta::Ptr{Cfloat}, yarray::Ptr{Ptr{BFloat16}},
-                                             incy::Int64, batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasTSTgemvBatched_64(
+        handle::cublasHandle_t, trans::cublasOperation_t,
+        m::Int64, n::Int64, alpha::Ptr{Cfloat},
+        Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
+        xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
+        beta::Ptr{Cfloat}, yarray::Ptr{Ptr{BFloat16}},
+        incy::Int64, batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasTSSgemvBatched(handle, trans, m, n, alpha, Aarray, lda, xarray,
@@ -77,15 +89,19 @@ end
                                           incy::Cint, batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasTSSgemvBatched_64(handle, trans, m, n, alpha, Aarray, lda, xarray,
-                                          incx, beta, yarray, incy, batchCount)
+@checked function cublasTSSgemvBatched_64(
+        handle, trans, m, n, alpha, Aarray, lda, xarray,
+        incx, beta, yarray, incy, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasTSSgemvBatched_64(handle::cublasHandle_t, trans::cublasOperation_t,
-                                             m::Int64, n::Int64, alpha::Ptr{Cfloat},
-                                             Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
-                                             xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
-                                             beta::Ptr{Cfloat}, yarray::Ptr{Ptr{Cfloat}},
-                                             incy::Int64, batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasTSSgemvBatched_64(
+        handle::cublasHandle_t, trans::cublasOperation_t,
+        m::Int64, n::Int64, alpha::Ptr{Cfloat},
+        Aarray::Ptr{Ptr{BFloat16}}, lda::Int64,
+        xarray::Ptr{Ptr{BFloat16}}, incx::Int64,
+        beta::Ptr{Cfloat}, yarray::Ptr{Ptr{Cfloat}},
+        incy::Int64, batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHSHgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -103,19 +119,23 @@ end
                                                  batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHSHgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
-                                                 x, incx, stridex, beta, y, incy, stridey,
-                                                 batchCount)
+@checked function cublasHSHgemvStridedBatched_64(
+        handle, trans, m, n, alpha, A, lda, strideA,
+        x, incx, stridex, beta, y, incy, stridey,
+        batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHSHgemvStridedBatched_64(handle::cublasHandle_t,
-                                                    trans::cublasOperation_t, m::Int64, n::Int64,
-                                                    alpha::CuRef{Cfloat}, A::CuPtr{Float16},
-                                                    lda::Int64, strideA::Clonglong,
-                                                    x::CuPtr{Float16}, incx::Int64,
-                                                    stridex::Clonglong, beta::CuRef{Cfloat},
-                                                    y::CuPtr{Float16}, incy::Int64,
-                                                    stridey::Clonglong,
-                                                    batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHSHgemvStridedBatched_64(
+        handle::cublasHandle_t,
+        trans::cublasOperation_t, m::Int64, n::Int64,
+        alpha::CuRef{Cfloat}, A::CuPtr{Float16},
+        lda::Int64, strideA::Clonglong,
+        x::CuPtr{Float16}, incx::Int64,
+        stridex::Clonglong, beta::CuRef{Cfloat},
+        y::CuPtr{Float16}, incy::Int64,
+        stridey::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHSSgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -133,19 +153,23 @@ end
                                                  batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHSSgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
-                                                 x, incx, stridex, beta, y, incy, stridey,
-                                                 batchCount)
+@checked function cublasHSSgemvStridedBatched_64(
+        handle, trans, m, n, alpha, A, lda, strideA,
+        x, incx, stridex, beta, y, incy, stridey,
+        batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHSSgemvStridedBatched_64(handle::cublasHandle_t,
-                                                    trans::cublasOperation_t, m::Int64, n::Int64,
-                                                    alpha::CuRef{Cfloat}, A::CuPtr{Float16},
-                                                    lda::Int64, strideA::Clonglong,
-                                                    x::CuPtr{Float16}, incx::Int64,
-                                                    stridex::Clonglong, beta::CuRef{Cfloat},
-                                                    y::CuPtr{Cfloat}, incy::Int64,
-                                                    stridey::Clonglong,
-                                                    batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHSSgemvStridedBatched_64(
+        handle::cublasHandle_t,
+        trans::cublasOperation_t, m::Int64, n::Int64,
+        alpha::CuRef{Cfloat}, A::CuPtr{Float16},
+        lda::Int64, strideA::Clonglong,
+        x::CuPtr{Float16}, incx::Int64,
+        stridex::Clonglong, beta::CuRef{Cfloat},
+        y::CuPtr{Cfloat}, incy::Int64,
+        stridey::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasTSTgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -163,19 +187,23 @@ end
                                                  batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasTSTgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
-                                                 x, incx, stridex, beta, y, incy, stridey,
-                                                 batchCount)
+@checked function cublasTSTgemvStridedBatched_64(
+        handle, trans, m, n, alpha, A, lda, strideA,
+        x, incx, stridex, beta, y, incy, stridey,
+        batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasTSTgemvStridedBatched_64(handle::cublasHandle_t,
-                                                    trans::cublasOperation_t, m::Int64, n::Int64,
-                                                    alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
-                                                    lda::Int64, strideA::Clonglong,
-                                                    x::CuPtr{BFloat16}, incx::Int64,
-                                                    stridex::Clonglong, beta::CuRef{Cfloat},
-                                                    y::CuPtr{BFloat16}, incy::Int64,
-                                                    stridey::Clonglong,
-                                                    batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasTSTgemvStridedBatched_64(
+        handle::cublasHandle_t,
+        trans::cublasOperation_t, m::Int64, n::Int64,
+        alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
+        lda::Int64, strideA::Clonglong,
+        x::CuPtr{BFloat16}, incx::Int64,
+        stridex::Clonglong, beta::CuRef{Cfloat},
+        y::CuPtr{BFloat16}, incy::Int64,
+        stridey::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasTSSgemvStridedBatched(handle, trans, m, n, alpha, A, lda, strideA,
@@ -193,19 +221,23 @@ end
                                                  batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasTSSgemvStridedBatched_64(handle, trans, m, n, alpha, A, lda, strideA,
-                                                 x, incx, stridex, beta, y, incy, stridey,
-                                                 batchCount)
+@checked function cublasTSSgemvStridedBatched_64(
+        handle, trans, m, n, alpha, A, lda, strideA,
+        x, incx, stridex, beta, y, incy, stridey,
+        batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasTSSgemvStridedBatched_64(handle::cublasHandle_t,
-                                                    trans::cublasOperation_t, m::Int64, n::Int64,
-                                                    alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
-                                                    lda::Int64, strideA::Clonglong,
-                                                    x::CuPtr{BFloat16}, incx::Int64,
-                                                    stridex::Clonglong, beta::CuRef{Cfloat},
-                                                    y::CuPtr{Cfloat}, incy::Int64,
-                                                    stridey::Clonglong,
-                                                    batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasTSSgemvStridedBatched_64(
+        handle::cublasHandle_t,
+        trans::cublasOperation_t, m::Int64, n::Int64,
+        alpha::CuRef{Cfloat}, A::CuPtr{BFloat16},
+        lda::Int64, strideA::Clonglong,
+        x::CuPtr{BFloat16}, incx::Int64,
+        stridex::Clonglong, beta::CuRef{Cfloat},
+        y::CuPtr{Cfloat}, incy::Int64,
+        stridey::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
@@ -218,14 +250,18 @@ end
                                  C::Ptr{Float16}, ldc::Cint)::cublasStatus_t
 end
 
-@checked function cublasHgemm_64(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
-                                 C, ldc)
+@checked function cublasHgemm_64(
+        handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+        C, ldc
+    )
     initialize_context()
-    @ccall libcublas.cublasHgemm_64(handle::cublasHandle_t, transa::cublasOperation_t,
-                                    transb::cublasOperation_t, m::Int64, n::Int64, k::Int64,
-                                    alpha::Ptr{Float16}, A::Ptr{Float16}, lda::Int64,
-                                    B::Ptr{Float16}, ldb::Int64, beta::Ptr{Float16},
-                                    C::Ptr{Float16}, ldc::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHgemm_64(
+        handle::cublasHandle_t, transa::cublasOperation_t,
+        transb::cublasOperation_t, m::Int64, n::Int64, k::Int64,
+        alpha::Ptr{Float16}, A::Ptr{Float16}, lda::Int64,
+        B::Ptr{Float16}, ldb::Int64, beta::Ptr{Float16},
+        C::Ptr{Float16}, ldc::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHgemmBatched(handle, transa, transb, m, n, k, alpha, Aarray, lda,
@@ -241,17 +277,21 @@ end
                                         batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHgemmBatched_64(handle, transa, transb, m, n, k, alpha, Aarray, lda,
-                                        Barray, ldb, beta, Carray, ldc, batchCount)
+@checked function cublasHgemmBatched_64(
+        handle, transa, transb, m, n, k, alpha, Aarray, lda,
+        Barray, ldb, beta, Carray, ldc, batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHgemmBatched_64(handle::cublasHandle_t, transa::cublasOperation_t,
-                                           transb::cublasOperation_t, m::Int64, n::Int64,
-                                           k::Int64, alpha::CuRef{Float16},
-                                           Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
-                                           Barray::CuPtr{Ptr{Float16}}, ldb::Int64,
-                                           beta::CuRef{Float16},
-                                           Carray::CuPtr{Ptr{Float16}}, ldc::Int64,
-                                           batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHgemmBatched_64(
+        handle::cublasHandle_t, transa::cublasOperation_t,
+        transb::cublasOperation_t, m::Int64, n::Int64,
+        k::Int64, alpha::CuRef{Float16},
+        Aarray::CuPtr{Ptr{Float16}}, lda::Int64,
+        Barray::CuPtr{Ptr{Float16}}, ldb::Int64,
+        beta::CuRef{Float16},
+        Carray::CuPtr{Ptr{Float16}}, ldc::Int64,
+        batchCount::Int64
+    )::cublasStatus_t
 end
 
 @checked function cublasHgemmStridedBatched(handle, transa, transb, m, n, k, alpha, A, lda,
@@ -270,18 +310,22 @@ end
                                                batchCount::Cint)::cublasStatus_t
 end
 
-@checked function cublasHgemmStridedBatched_64(handle, transa, transb, m, n, k, alpha, A, lda,
-                                               strideA, B, ldb, strideB, beta, C, ldc, strideC,
-                                               batchCount)
+@checked function cublasHgemmStridedBatched_64(
+        handle, transa, transb, m, n, k, alpha, A, lda,
+        strideA, B, ldb, strideB, beta, C, ldc, strideC,
+        batchCount
+    )
     initialize_context()
-    @ccall libcublas.cublasHgemmStridedBatched_64(handle::cublasHandle_t,
-                                                  transa::cublasOperation_t,
-                                                  transb::cublasOperation_t, m::Int64, n::Int64,
-                                                  k::Int64, alpha::CuRef{Float16},
-                                                  A::CuPtr{Float16}, lda::Int64,
-                                                  strideA::Clonglong, B::CuPtr{Float16},
-                                                  ldb::Int64, strideB::Clonglong,
-                                                  beta::CuRef{Float16}, C::CuPtr{Float16},
-                                                  ldc::Int64, strideC::Clonglong,
-                                                  batchCount::Int64)::cublasStatus_t
+    @ccall libcublas.cublasHgemmStridedBatched_64(
+        handle::cublasHandle_t,
+        transa::cublasOperation_t,
+        transb::cublasOperation_t, m::Int64, n::Int64,
+        k::Int64, alpha::CuRef{Float16},
+        A::CuPtr{Float16}, lda::Int64,
+        strideA::Clonglong, B::CuPtr{Float16},
+        ldb::Int64, strideB::Clonglong,
+        beta::CuRef{Float16}, C::CuPtr{Float16},
+        ldc::Int64, strideC::Clonglong,
+        batchCount::Int64
+    )::cublasStatus_t
 end

github-actions

CUDA.jl Benchmarks

Benchmark suite	Current: `fa318bf`	Previous: `c05359d`	Ratio
`latency/precompile`	`42941278681` ns	`42922336650.5` ns	`1.00`
`latency/ttfp`	`7012501535` ns	`7015168424` ns	`1.00`
`latency/import`	`3593901953` ns	`3571269514` ns	`1.01`
`integration/volumerhs`	`9625712` ns	`9608723` ns	`1.00`
`integration/byval/slices=1`	`147326` ns	`146920.5` ns	`1.00`
`integration/byval/slices=3`	`426789` ns	`425845` ns	`1.00`
`integration/byval/reference`	`145182` ns	`145020` ns	`1.00`
`integration/byval/slices=2`	`286848` ns	`286380` ns	`1.00`
`integration/cudadevrt`	`103616` ns	`103554` ns	`1.00`
`kernel/indexing`	`14395` ns	`14235` ns	`1.01`
`kernel/indexing_checked`	`15083` ns	`14711` ns	`1.03`
`kernel/occupancy`	`674.2675159235669` ns	`672.5506329113924` ns	`1.00`
`kernel/launch`	`2273.222222222222` ns	`2270.3333333333335` ns	`1.00`
`kernel/rand`	`18135` ns	`14669` ns	`1.24`
`array/reverse/1d`	`20027` ns	`19682` ns	`1.02`
`array/reverse/2d`	`25187` ns	`23613.5` ns	`1.07`
`array/reverse/1d_inplace`	`10411` ns	`10461` ns	`1.00`
`array/reverse/2d_inplace`	`12136` ns	`13212` ns	`0.92`
`array/copy`	`20893` ns	`20972` ns	`1.00`
`array/iteration/findall/int`	`157185` ns	`157808` ns	`1.00`
`array/iteration/findall/bool`	`139596` ns	`139837` ns	`1.00`
`array/iteration/findfirst/int`	`163592` ns	`164937` ns	`0.99`
`array/iteration/findfirst/bool`	`164983` ns	`165868` ns	`0.99`
`array/iteration/scalar`	`71931.5` ns	`73041` ns	`0.98`
`array/iteration/logical`	`215679` ns	`214850` ns	`1.00`
`array/iteration/findmin/1d`	`46426.5` ns	`46704` ns	`0.99`
`array/iteration/findmin/2d`	`96415` ns	`96962.5` ns	`0.99`
`array/reductions/reduce/Int64/1d`	`43753` ns	`46033` ns	`0.95`
`array/reductions/reduce/Int64/dims=1`	`47827.5` ns	`55193` ns	`0.87`
`array/reductions/reduce/Int64/dims=2`	`62970.5` ns	`62917` ns	`1.00`
`array/reductions/reduce/Int64/dims=1L`	`89100` ns	`88869` ns	`1.00`
`array/reductions/reduce/Int64/dims=2L`	`88718` ns	`87079` ns	`1.02`
`array/reductions/reduce/Float32/1d`	`34717.5` ns	`34606` ns	`1.00`
`array/reductions/reduce/Float32/dims=1`	`52089` ns	`43875` ns	`1.19`
`array/reductions/reduce/Float32/dims=2`	`60095` ns	`59705` ns	`1.01`
`array/reductions/reduce/Float32/dims=1L`	`52647` ns	`52260` ns	`1.01`
`array/reductions/reduce/Float32/dims=2L`	`70580.5` ns	`70051.5` ns	`1.01`
`array/reductions/mapreduce/Int64/1d`	`43883.5` ns	`42671.5` ns	`1.03`
`array/reductions/mapreduce/Int64/dims=1`	`53014.5` ns	`45980` ns	`1.15`
`array/reductions/mapreduce/Int64/dims=2`	`62792.5` ns	`62143.5` ns	`1.01`
`array/reductions/mapreduce/Int64/dims=1L`	`89136` ns	`88812` ns	`1.00`
`array/reductions/mapreduce/Int64/dims=2L`	`87839` ns	`86818` ns	`1.01`
`array/reductions/mapreduce/Float32/1d`	`34925` ns	`34742` ns	`1.01`
`array/reductions/mapreduce/Float32/dims=1`	`42033` ns	`43090.5` ns	`0.98`
`array/reductions/mapreduce/Float32/dims=2`	`60170` ns	`60061` ns	`1.00`
`array/reductions/mapreduce/Float32/dims=1L`	`53055` ns	`52528` ns	`1.01`
`array/reductions/mapreduce/Float32/dims=2L`	`70741` ns	`70191` ns	`1.01`
`array/broadcast`	`20137` ns	`20155` ns	`1.00`
`array/copyto!/gpu_to_gpu`	`12817` ns	`11294` ns	`1.13`
`array/copyto!/cpu_to_gpu`	`215702` ns	`216503` ns	`1.00`
`array/copyto!/gpu_to_cpu`	`283299` ns	`284237` ns	`1.00`
`array/accumulate/Int64/1d`	`125699` ns	`125529` ns	`1.00`
`array/accumulate/Int64/dims=1`	`83460` ns	`84037` ns	`0.99`
`array/accumulate/Int64/dims=2`	`158136` ns	`159166` ns	`0.99`
`array/accumulate/Int64/dims=1L`	`1709339` ns	`1720376` ns	`0.99`
`array/accumulate/Int64/dims=2L`	`966391` ns	`968348` ns	`1.00`
`array/accumulate/Float32/1d`	`110019` ns	`109984` ns	`1.00`
`array/accumulate/Float32/dims=1`	`80843` ns	`81082` ns	`1.00`
`array/accumulate/Float32/dims=2`	`148409.5` ns	`148760` ns	`1.00`
`array/accumulate/Float32/dims=1L`	`1618389` ns	`1629307.5` ns	`0.99`
`array/accumulate/Float32/dims=2L`	`698983` ns	`701479` ns	`1.00`
`array/construct`	`1330.3` ns	`1287.2` ns	`1.03`
`array/random/randn/Float32`	`44727.5` ns	`44176` ns	`1.01`
`array/random/randn!/Float32`	`25125` ns	`24930` ns	`1.01`
`array/random/rand!/Int64`	`27517` ns	`27547` ns	`1.00`
`array/random/rand!/Float32`	`8727` ns	`8724.666666666666` ns	`1.00`
`array/random/rand/Int64`	`30180` ns	`30114` ns	`1.00`
`array/random/rand/Float32`	`13154` ns	`13059` ns	`1.01`
`array/permutedims/4d`	`60328` ns	`60761` ns	`0.99`
`array/permutedims/2d`	`54408` ns	`54037` ns	`1.01`
`array/permutedims/3d`	`55270` ns	`54954` ns	`1.01`
`array/sorting/1d`	`2757342.5` ns	`2756544` ns	`1.00`
`array/sorting/by`	`3344342` ns	`3343249` ns	`1.00`
`array/sorting/2d`	`1080760` ns	`1080799` ns	`1.00`
`cuda/synchronization/stream/auto`	`1039.5` ns	`1040.3` ns	`1.00`
`cuda/synchronization/stream/nonblocking`	`7651.9` ns	`7220` ns	`1.06`
`cuda/synchronization/stream/blocking`	`814.8390804597701` ns	`802.3333333333334` ns	`1.02`
`cuda/synchronization/context/auto`	`1182` ns	`1203.5` ns	`0.98`
`cuda/synchronization/context/nonblocking`	`8322.7` ns	`7276.700000000001` ns	`1.14`
`cuda/synchronization/context/blocking`	`930.6428571428571` ns	`900.4347826086956` ns	`1.03`

This comment was automatically generated by workflow using github-action-benchmark.

codecov · 2025-08-14T21:12:12Z

Codecov Report

✅ All modified and coverable lines are covered by tests.
✅ Project coverage is 89.64%. Comparing base (d6ad9c3) to head (f34815b).
⚠️ Report is 1 commits behind head on master.

Additional details and impacted files

@@             Coverage Diff             @@
##           master    #2845       +/-   ##
===========================================
+ Coverage   77.39%   89.64%   +12.25%     
===========================================
  Files         150      150               
  Lines       13124    13237      +113     
===========================================
+ Hits        10157    11866     +1709     
+ Misses       2967     1371     -1596

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:

❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

Update wrapppers to use the ILP64 API

c5da1f5

amontoison changed the title ~~Update wrapppers to use the ILP64 API~~ [CUBLAS] Update wrapppers to use the ILP64 API Aug 12, 2025

Update wrappers.jl

ace28c3

github-actions bot reviewed Aug 12, 2025

View reviewed changes

amontoison added 2 commits August 14, 2025 10:05

Add missing Julia wrappers

fa318bf

rotmg doesn't have ILP64 symbols

f34815b

amontoison requested a review from kshyatt August 14, 2025 22:35

kshyatt approved these changes Aug 15, 2025

View reviewed changes

kshyatt merged commit 6ef625a into JuliaGPU:master Aug 15, 2025
3 checks passed

amontoison deleted the cublas_64 branch August 16, 2025 05:14

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[CUBLAS] Update wrapppers to use the ILP64 API #2845

[CUBLAS] Update wrapppers to use the ILP64 API #2845

Uh oh!

amontoison commented Aug 12, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Aug 12, 2025 •

edited

Loading

Uh oh!

github-actions bot left a comment •

edited

Loading

Uh oh!

codecov bot commented Aug 14, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

[CUBLAS] Update wrapppers to use the ILP64 API #2845

[CUBLAS] Update wrapppers to use the ILP64 API #2845

Uh oh!

Conversation

amontoison commented Aug 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Aug 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot left a comment • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

CUDA.jl Benchmarks

Uh oh!

codecov bot commented Aug 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Codecov Report

Uh oh!

Uh oh!

Uh oh!

amontoison commented Aug 12, 2025 •

edited

Loading

github-actions bot commented Aug 12, 2025 •

edited

Loading

github-actions bot left a comment •

edited

Loading

codecov bot commented Aug 14, 2025 •

edited

Loading